{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Clustering Online Retail Sales Data\n",
    "\n",
    "Dataset: https://archive.ics.uci.edu/ml/datasets/online+retail"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn import preprocessing, metrics, cluster\n",
    "\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Load data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>InvoiceNo</th>\n",
       "      <th>StockCode</th>\n",
       "      <th>Description</th>\n",
       "      <th>Quantity</th>\n",
       "      <th>InvoiceDate</th>\n",
       "      <th>UnitPrice</th>\n",
       "      <th>CustomerID</th>\n",
       "      <th>Country</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>536365</td>\n",
       "      <td>85123A</td>\n",
       "      <td>WHITE HANGING HEART T-LIGHT HOLDER</td>\n",
       "      <td>6</td>\n",
       "      <td>2010-12-01 08:26:00</td>\n",
       "      <td>2.55</td>\n",
       "      <td>17850.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>536365</td>\n",
       "      <td>71053</td>\n",
       "      <td>WHITE METAL LANTERN</td>\n",
       "      <td>6</td>\n",
       "      <td>2010-12-01 08:26:00</td>\n",
       "      <td>3.39</td>\n",
       "      <td>17850.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>536365</td>\n",
       "      <td>84406B</td>\n",
       "      <td>CREAM CUPID HEARTS COAT HANGER</td>\n",
       "      <td>8</td>\n",
       "      <td>2010-12-01 08:26:00</td>\n",
       "      <td>2.75</td>\n",
       "      <td>17850.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>536365</td>\n",
       "      <td>84029G</td>\n",
       "      <td>KNITTED UNION FLAG HOT WATER BOTTLE</td>\n",
       "      <td>6</td>\n",
       "      <td>2010-12-01 08:26:00</td>\n",
       "      <td>3.39</td>\n",
       "      <td>17850.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>536365</td>\n",
       "      <td>84029E</td>\n",
       "      <td>RED WOOLLY HOTTIE WHITE HEART.</td>\n",
       "      <td>6</td>\n",
       "      <td>2010-12-01 08:26:00</td>\n",
       "      <td>3.39</td>\n",
       "      <td>17850.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  InvoiceNo StockCode                          Description  Quantity  \\\n",
       "0    536365    85123A   WHITE HANGING HEART T-LIGHT HOLDER         6   \n",
       "1    536365     71053                  WHITE METAL LANTERN         6   \n",
       "2    536365    84406B       CREAM CUPID HEARTS COAT HANGER         8   \n",
       "3    536365    84029G  KNITTED UNION FLAG HOT WATER BOTTLE         6   \n",
       "4    536365    84029E       RED WOOLLY HOTTIE WHITE HEART.         6   \n",
       "\n",
       "          InvoiceDate  UnitPrice  CustomerID         Country  \n",
       "0 2010-12-01 08:26:00       2.55     17850.0  United Kingdom  \n",
       "1 2010-12-01 08:26:00       3.39     17850.0  United Kingdom  \n",
       "2 2010-12-01 08:26:00       2.75     17850.0  United Kingdom  \n",
       "3 2010-12-01 08:26:00       3.39     17850.0  United Kingdom  \n",
       "4 2010-12-01 08:26:00       3.39     17850.0  United Kingdom  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_excel(\"/data/Online Retail.xlsx\")\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Find out if there are any nan in the columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 541909 entries, 0 to 541908\n",
      "Data columns (total 8 columns):\n",
      "InvoiceNo      541909 non-null object\n",
      "StockCode      541909 non-null object\n",
      "Description    540455 non-null object\n",
      "Quantity       541909 non-null int64\n",
      "InvoiceDate    541909 non-null datetime64[ns]\n",
      "UnitPrice      541909 non-null float64\n",
      "CustomerID     406829 non-null float64\n",
      "Country        541909 non-null object\n",
      "dtypes: datetime64[ns](1), float64(2), int64(1), object(4)\n",
      "memory usage: 33.1+ MB\n"
     ]
    }
   ],
   "source": [
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "InvoiceNo      False\n",
       "StockCode      False\n",
       "Description     True\n",
       "Quantity       False\n",
       "InvoiceDate    False\n",
       "UnitPrice      False\n",
       "CustomerID      True\n",
       "Country        False\n",
       "dtype: bool"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.isna().any()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Drop all records having nan CustomerId"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 406829 entries, 0 to 541908\n",
      "Data columns (total 8 columns):\n",
      "InvoiceNo      406829 non-null object\n",
      "StockCode      406829 non-null object\n",
      "Description    406829 non-null object\n",
      "Quantity       406829 non-null int64\n",
      "InvoiceDate    406829 non-null datetime64[ns]\n",
      "UnitPrice      406829 non-null float64\n",
      "CustomerID     406829 non-null float64\n",
      "Country        406829 non-null object\n",
      "dtypes: datetime64[ns](1), float64(2), int64(1), object(4)\n",
      "memory usage: 27.9+ MB\n"
     ]
    }
   ],
   "source": [
    "df = df[~df.CustomerID.isna()]\n",
    "df.info()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Find number of unique values for each column"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "InvoiceNo      22190\n",
       "StockCode       3684\n",
       "Description     3896\n",
       "Quantity         436\n",
       "InvoiceDate    20460\n",
       "UnitPrice        620\n",
       "CustomerID      4372\n",
       "Country           37\n",
       "dtype: int64"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.nunique()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Use describe to find range of each column"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Quantity</th>\n",
       "      <th>UnitPrice</th>\n",
       "      <th>CustomerID</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>406829.000000</td>\n",
       "      <td>406829.000000</td>\n",
       "      <td>406829.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>12.061303</td>\n",
       "      <td>3.460471</td>\n",
       "      <td>15287.690570</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>248.693370</td>\n",
       "      <td>69.315162</td>\n",
       "      <td>1713.600303</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>-80995.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>12346.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>2.000000</td>\n",
       "      <td>1.250000</td>\n",
       "      <td>13953.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>5.000000</td>\n",
       "      <td>1.950000</td>\n",
       "      <td>15152.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>12.000000</td>\n",
       "      <td>3.750000</td>\n",
       "      <td>16791.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>80995.000000</td>\n",
       "      <td>38970.000000</td>\n",
       "      <td>18287.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            Quantity      UnitPrice     CustomerID\n",
       "count  406829.000000  406829.000000  406829.000000\n",
       "mean       12.061303       3.460471   15287.690570\n",
       "std       248.693370      69.315162    1713.600303\n",
       "min    -80995.000000       0.000000   12346.000000\n",
       "25%         2.000000       1.250000   13953.000000\n",
       "50%         5.000000       1.950000   15152.000000\n",
       "75%        12.000000       3.750000   16791.000000\n",
       "max     80995.000000   38970.000000   18287.000000"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We see quantity column has a large negative value. Probably negative values are not valid for this analysis. Drop the records having negative values in frequency. Verify that there is not more nagative values in the columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 397924 entries, 0 to 541908\n",
      "Data columns (total 8 columns):\n",
      "InvoiceNo      397924 non-null object\n",
      "StockCode      397924 non-null object\n",
      "Description    397924 non-null object\n",
      "Quantity       397924 non-null int64\n",
      "InvoiceDate    397924 non-null datetime64[ns]\n",
      "UnitPrice      397924 non-null float64\n",
      "CustomerID     397924 non-null float64\n",
      "Country        397924 non-null object\n",
      "dtypes: datetime64[ns](1), float64(2), int64(1), object(4)\n",
      "memory usage: 27.3+ MB\n"
     ]
    }
   ],
   "source": [
    "df = df[df.Quantity>0]\n",
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Quantity</th>\n",
       "      <th>UnitPrice</th>\n",
       "      <th>CustomerID</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>397924.000000</td>\n",
       "      <td>397924.000000</td>\n",
       "      <td>397924.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>13.021823</td>\n",
       "      <td>3.116174</td>\n",
       "      <td>15294.315171</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>180.420210</td>\n",
       "      <td>22.096788</td>\n",
       "      <td>1713.169877</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>12346.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>2.000000</td>\n",
       "      <td>1.250000</td>\n",
       "      <td>13969.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>6.000000</td>\n",
       "      <td>1.950000</td>\n",
       "      <td>15159.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>12.000000</td>\n",
       "      <td>3.750000</td>\n",
       "      <td>16795.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>80995.000000</td>\n",
       "      <td>8142.750000</td>\n",
       "      <td>18287.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            Quantity      UnitPrice     CustomerID\n",
       "count  397924.000000  397924.000000  397924.000000\n",
       "mean       13.021823       3.116174   15294.315171\n",
       "std       180.420210      22.096788    1713.169877\n",
       "min         1.000000       0.000000   12346.000000\n",
       "25%         2.000000       1.250000   13969.000000\n",
       "50%         6.000000       1.950000   15159.000000\n",
       "75%        12.000000       3.750000   16795.000000\n",
       "max     80995.000000    8142.750000   18287.000000"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>InvoiceNo</th>\n",
       "      <th>StockCode</th>\n",
       "      <th>Description</th>\n",
       "      <th>Quantity</th>\n",
       "      <th>InvoiceDate</th>\n",
       "      <th>UnitPrice</th>\n",
       "      <th>CustomerID</th>\n",
       "      <th>Country</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>536365</td>\n",
       "      <td>85123A</td>\n",
       "      <td>WHITE HANGING HEART T-LIGHT HOLDER</td>\n",
       "      <td>6</td>\n",
       "      <td>2010-12-01 08:26:00</td>\n",
       "      <td>2.55</td>\n",
       "      <td>17850.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>536365</td>\n",
       "      <td>71053</td>\n",
       "      <td>WHITE METAL LANTERN</td>\n",
       "      <td>6</td>\n",
       "      <td>2010-12-01 08:26:00</td>\n",
       "      <td>3.39</td>\n",
       "      <td>17850.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>536365</td>\n",
       "      <td>84406B</td>\n",
       "      <td>CREAM CUPID HEARTS COAT HANGER</td>\n",
       "      <td>8</td>\n",
       "      <td>2010-12-01 08:26:00</td>\n",
       "      <td>2.75</td>\n",
       "      <td>17850.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>536365</td>\n",
       "      <td>84029G</td>\n",
       "      <td>KNITTED UNION FLAG HOT WATER BOTTLE</td>\n",
       "      <td>6</td>\n",
       "      <td>2010-12-01 08:26:00</td>\n",
       "      <td>3.39</td>\n",
       "      <td>17850.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>536365</td>\n",
       "      <td>84029E</td>\n",
       "      <td>RED WOOLLY HOTTIE WHITE HEART.</td>\n",
       "      <td>6</td>\n",
       "      <td>2010-12-01 08:26:00</td>\n",
       "      <td>3.39</td>\n",
       "      <td>17850.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  InvoiceNo StockCode                          Description  Quantity  \\\n",
       "0    536365    85123A   WHITE HANGING HEART T-LIGHT HOLDER         6   \n",
       "1    536365     71053                  WHITE METAL LANTERN         6   \n",
       "2    536365    84406B       CREAM CUPID HEARTS COAT HANGER         8   \n",
       "3    536365    84029G  KNITTED UNION FLAG HOT WATER BOTTLE         6   \n",
       "4    536365    84029E       RED WOOLLY HOTTIE WHITE HEART.         6   \n",
       "\n",
       "          InvoiceDate  UnitPrice  CustomerID         Country  \n",
       "0 2010-12-01 08:26:00       2.55     17850.0  United Kingdom  \n",
       "1 2010-12-01 08:26:00       3.39     17850.0  United Kingdom  \n",
       "2 2010-12-01 08:26:00       2.75     17850.0  United Kingdom  \n",
       "3 2010-12-01 08:26:00       3.39     17850.0  United Kingdom  \n",
       "4 2010-12-01 08:26:00       3.39     17850.0  United Kingdom  "
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Convert the InvoiceDate to datetime."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.InvoiceDate = pd.to_datetime(df.InvoiceDate)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>InvoiceNo</th>\n",
       "      <th>StockCode</th>\n",
       "      <th>Description</th>\n",
       "      <th>Quantity</th>\n",
       "      <th>InvoiceDate</th>\n",
       "      <th>UnitPrice</th>\n",
       "      <th>CustomerID</th>\n",
       "      <th>Country</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>536365</td>\n",
       "      <td>85123A</td>\n",
       "      <td>WHITE HANGING HEART T-LIGHT HOLDER</td>\n",
       "      <td>6</td>\n",
       "      <td>2010-12-01 08:26:00</td>\n",
       "      <td>2.55</td>\n",
       "      <td>17850.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>536365</td>\n",
       "      <td>71053</td>\n",
       "      <td>WHITE METAL LANTERN</td>\n",
       "      <td>6</td>\n",
       "      <td>2010-12-01 08:26:00</td>\n",
       "      <td>3.39</td>\n",
       "      <td>17850.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>536365</td>\n",
       "      <td>84406B</td>\n",
       "      <td>CREAM CUPID HEARTS COAT HANGER</td>\n",
       "      <td>8</td>\n",
       "      <td>2010-12-01 08:26:00</td>\n",
       "      <td>2.75</td>\n",
       "      <td>17850.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>536365</td>\n",
       "      <td>84029G</td>\n",
       "      <td>KNITTED UNION FLAG HOT WATER BOTTLE</td>\n",
       "      <td>6</td>\n",
       "      <td>2010-12-01 08:26:00</td>\n",
       "      <td>3.39</td>\n",
       "      <td>17850.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>536365</td>\n",
       "      <td>84029E</td>\n",
       "      <td>RED WOOLLY HOTTIE WHITE HEART.</td>\n",
       "      <td>6</td>\n",
       "      <td>2010-12-01 08:26:00</td>\n",
       "      <td>3.39</td>\n",
       "      <td>17850.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  InvoiceNo StockCode                          Description  Quantity  \\\n",
       "0    536365    85123A   WHITE HANGING HEART T-LIGHT HOLDER         6   \n",
       "1    536365     71053                  WHITE METAL LANTERN         6   \n",
       "2    536365    84406B       CREAM CUPID HEARTS COAT HANGER         8   \n",
       "3    536365    84029G  KNITTED UNION FLAG HOT WATER BOTTLE         6   \n",
       "4    536365    84029E       RED WOOLLY HOTTIE WHITE HEART.         6   \n",
       "\n",
       "          InvoiceDate  UnitPrice  CustomerID         Country  \n",
       "0 2010-12-01 08:26:00       2.55     17850.0  United Kingdom  \n",
       "1 2010-12-01 08:26:00       3.39     17850.0  United Kingdom  \n",
       "2 2010-12-01 08:26:00       2.75     17850.0  United Kingdom  \n",
       "3 2010-12-01 08:26:00       3.39     17850.0  United Kingdom  \n",
       "4 2010-12-01 08:26:00       3.39     17850.0  United Kingdom  "
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Create caculated field to computee TotalPrice "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>InvoiceNo</th>\n",
       "      <th>StockCode</th>\n",
       "      <th>Description</th>\n",
       "      <th>Quantity</th>\n",
       "      <th>InvoiceDate</th>\n",
       "      <th>UnitPrice</th>\n",
       "      <th>CustomerID</th>\n",
       "      <th>Country</th>\n",
       "      <th>TotalPrice</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>536365</td>\n",
       "      <td>85123A</td>\n",
       "      <td>WHITE HANGING HEART T-LIGHT HOLDER</td>\n",
       "      <td>6</td>\n",
       "      <td>2010-12-01 08:26:00</td>\n",
       "      <td>2.55</td>\n",
       "      <td>17850.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>15.30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>536365</td>\n",
       "      <td>71053</td>\n",
       "      <td>WHITE METAL LANTERN</td>\n",
       "      <td>6</td>\n",
       "      <td>2010-12-01 08:26:00</td>\n",
       "      <td>3.39</td>\n",
       "      <td>17850.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>20.34</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>536365</td>\n",
       "      <td>84406B</td>\n",
       "      <td>CREAM CUPID HEARTS COAT HANGER</td>\n",
       "      <td>8</td>\n",
       "      <td>2010-12-01 08:26:00</td>\n",
       "      <td>2.75</td>\n",
       "      <td>17850.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>22.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>536365</td>\n",
       "      <td>84029G</td>\n",
       "      <td>KNITTED UNION FLAG HOT WATER BOTTLE</td>\n",
       "      <td>6</td>\n",
       "      <td>2010-12-01 08:26:00</td>\n",
       "      <td>3.39</td>\n",
       "      <td>17850.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>20.34</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>536365</td>\n",
       "      <td>84029E</td>\n",
       "      <td>RED WOOLLY HOTTIE WHITE HEART.</td>\n",
       "      <td>6</td>\n",
       "      <td>2010-12-01 08:26:00</td>\n",
       "      <td>3.39</td>\n",
       "      <td>17850.0</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>20.34</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  InvoiceNo StockCode                          Description  Quantity  \\\n",
       "0    536365    85123A   WHITE HANGING HEART T-LIGHT HOLDER         6   \n",
       "1    536365     71053                  WHITE METAL LANTERN         6   \n",
       "2    536365    84406B       CREAM CUPID HEARTS COAT HANGER         8   \n",
       "3    536365    84029G  KNITTED UNION FLAG HOT WATER BOTTLE         6   \n",
       "4    536365    84029E       RED WOOLLY HOTTIE WHITE HEART.         6   \n",
       "\n",
       "          InvoiceDate  UnitPrice  CustomerID         Country  TotalPrice  \n",
       "0 2010-12-01 08:26:00       2.55     17850.0  United Kingdom       15.30  \n",
       "1 2010-12-01 08:26:00       3.39     17850.0  United Kingdom       20.34  \n",
       "2 2010-12-01 08:26:00       2.75     17850.0  United Kingdom       22.00  \n",
       "3 2010-12-01 08:26:00       3.39     17850.0  United Kingdom       20.34  \n",
       "4 2010-12-01 08:26:00       3.39     17850.0  United Kingdom       20.34  "
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[\"TotalPrice\"] = df.Quantity * df.UnitPrice\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "For calculating rececency, use max for InvoiceDate as point of reference. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Timestamp('2011-12-09 12:50:00')"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "last_date = df.InvoiceDate.max()\n",
    "last_date"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Calculate the R-F-M."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>InvoiceDate</th>\n",
       "      <th>InvoiceNo</th>\n",
       "      <th>TotalPrice</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>CustomerID</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>12346.0</th>\n",
       "      <td>325</td>\n",
       "      <td>1</td>\n",
       "      <td>77183.60</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12347.0</th>\n",
       "      <td>1</td>\n",
       "      <td>182</td>\n",
       "      <td>4310.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12348.0</th>\n",
       "      <td>74</td>\n",
       "      <td>31</td>\n",
       "      <td>1797.24</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12349.0</th>\n",
       "      <td>18</td>\n",
       "      <td>73</td>\n",
       "      <td>1757.55</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12350.0</th>\n",
       "      <td>309</td>\n",
       "      <td>17</td>\n",
       "      <td>334.40</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            InvoiceDate  InvoiceNo  TotalPrice\n",
       "CustomerID                                    \n",
       "12346.0             325          1    77183.60\n",
       "12347.0               1        182     4310.00\n",
       "12348.0              74         31     1797.24\n",
       "12349.0              18         73     1757.55\n",
       "12350.0             309         17      334.40"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rfm = df.groupby(\"CustomerID\").agg({\n",
    "    \"InvoiceDate\": lambda values: (last_date - values.max()).days,\n",
    "    \"InvoiceNo\" : lambda values: len(values),\n",
    "    \"TotalPrice\": lambda values: np.sum(values)\n",
    "})\n",
    "\n",
    "rfm.head()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Rename the columns - \"recency\", \"frequency\", \"monetary\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>recency</th>\n",
       "      <th>frequency</th>\n",
       "      <th>monetary</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>CustomerID</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>12346.0</th>\n",
       "      <td>325</td>\n",
       "      <td>1</td>\n",
       "      <td>77183.60</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12347.0</th>\n",
       "      <td>1</td>\n",
       "      <td>182</td>\n",
       "      <td>4310.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12348.0</th>\n",
       "      <td>74</td>\n",
       "      <td>31</td>\n",
       "      <td>1797.24</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12349.0</th>\n",
       "      <td>18</td>\n",
       "      <td>73</td>\n",
       "      <td>1757.55</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12350.0</th>\n",
       "      <td>309</td>\n",
       "      <td>17</td>\n",
       "      <td>334.40</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            recency  frequency  monetary\n",
       "CustomerID                              \n",
       "12346.0         325          1  77183.60\n",
       "12347.0           1        182   4310.00\n",
       "12348.0          74         31   1797.24\n",
       "12349.0          18         73   1757.55\n",
       "12350.0         309         17    334.40"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rfm.columns = [\"recency\", \"frequency\", \"monetary\"]\n",
    "rfm.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Digitize the columns for R-F-M into 5 equal buckets. To achieve this, find percentile values as bucket boundaries. These will create 5 buckets of equal sizes."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 20,  40,  60,  80, 100])"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "quantiles = np.arange(1, 6) * 20\n",
    "quantiles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>recency</th>\n",
       "      <th>frequency</th>\n",
       "      <th>monetary</th>\n",
       "      <th>r_score</th>\n",
       "      <th>m_score</th>\n",
       "      <th>f_score</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>CustomerID</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>12346.0</th>\n",
       "      <td>325</td>\n",
       "      <td>1</td>\n",
       "      <td>77183.60</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12347.0</th>\n",
       "      <td>1</td>\n",
       "      <td>182</td>\n",
       "      <td>4310.00</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12348.0</th>\n",
       "      <td>74</td>\n",
       "      <td>31</td>\n",
       "      <td>1797.24</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12349.0</th>\n",
       "      <td>18</td>\n",
       "      <td>73</td>\n",
       "      <td>1757.55</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12350.0</th>\n",
       "      <td>309</td>\n",
       "      <td>17</td>\n",
       "      <td>334.40</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            recency  frequency  monetary  r_score  m_score  f_score\n",
       "CustomerID                                                         \n",
       "12346.0         325          1  77183.60        1        5        1\n",
       "12347.0           1        182   4310.00        5        5        5\n",
       "12348.0          74         31   1797.24        2        4        3\n",
       "12349.0          18         73   1757.55        4        4        4\n",
       "12350.0         309         17    334.40        1        2        2"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rfm[\"r_score\"] = np.digitize(rfm.recency, bins = np.percentile(rfm.recency, quantiles)\n",
    "                           , right=True)\n",
    "\n",
    "rfm[\"m_score\"] = np.digitize(rfm.monetary, bins = np.percentile(rfm.monetary, quantiles)\n",
    "                           , right=True)\n",
    "\n",
    "rfm[\"f_score\"] = np.digitize(rfm.frequency, bins = np.percentile(rfm.frequency, quantiles)\n",
    "                           , right=True)\n",
    "\n",
    "rfm[\"r_score\"] = 4 - rfm[\"r_score\"]\n",
    "\n",
    "rfm[\"r_score\"] = rfm[\"r_score\"] + 1\n",
    "rfm[\"f_score\"] = rfm[\"f_score\"] + 1\n",
    "rfm[\"m_score\"] = rfm[\"m_score\"] + 1\n",
    "\n",
    "\n",
    "rfm.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>recency</th>\n",
       "      <th>frequency</th>\n",
       "      <th>monetary</th>\n",
       "      <th>r_score</th>\n",
       "      <th>m_score</th>\n",
       "      <th>f_score</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>CustomerID</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>13037.0</th>\n",
       "      <td>137</td>\n",
       "      <td>48</td>\n",
       "      <td>881.07</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15864.0</th>\n",
       "      <td>21</td>\n",
       "      <td>20</td>\n",
       "      <td>1936.32</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13848.0</th>\n",
       "      <td>91</td>\n",
       "      <td>5</td>\n",
       "      <td>1255.00</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15274.0</th>\n",
       "      <td>3</td>\n",
       "      <td>47</td>\n",
       "      <td>716.57</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17865.0</th>\n",
       "      <td>30</td>\n",
       "      <td>370</td>\n",
       "      <td>10526.32</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17398.0</th>\n",
       "      <td>114</td>\n",
       "      <td>38</td>\n",
       "      <td>653.38</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12945.0</th>\n",
       "      <td>287</td>\n",
       "      <td>23</td>\n",
       "      <td>462.95</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16737.0</th>\n",
       "      <td>52</td>\n",
       "      <td>1</td>\n",
       "      <td>417.60</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14796.0</th>\n",
       "      <td>0</td>\n",
       "      <td>1141</td>\n",
       "      <td>8022.49</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16657.0</th>\n",
       "      <td>9</td>\n",
       "      <td>208</td>\n",
       "      <td>933.62</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            recency  frequency  monetary  r_score  m_score  f_score\n",
       "CustomerID                                                         \n",
       "13037.0         137         48    881.07        2        3        3\n",
       "15864.0          21         20   1936.32        4        4        2\n",
       "13848.0          91          5   1255.00        2        4        1\n",
       "15274.0           3         47    716.57        5        3        3\n",
       "17865.0          30        370  10526.32        4        5        5\n",
       "17398.0         114         38    653.38        2        3        3\n",
       "12945.0         287         23    462.95        1        2        2\n",
       "16737.0          52          1    417.60        3        2        1\n",
       "14796.0           0       1141   8022.49        5        5        5\n",
       "16657.0           9        208    933.62        5        3        5"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rfm.sample(10, random_state=123)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-1.4252185 , -1.3840666 ,  1.4140507 ],\n",
       "       [ 1.4030564 ,  1.4140533 ,  1.4140507 ],\n",
       "       [-0.7181498 ,  0.01499338,  0.70702535],\n",
       "       ...,\n",
       "       [ 1.4030564 , -1.3840666 , -1.4140507 ],\n",
       "       [ 1.4030564 ,  1.4140533 ,  1.4140507 ],\n",
       "       [-0.01108105,  0.7145233 ,  0.70702535]], dtype=float32)"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "scaler = preprocessing.StandardScaler()\n",
    "X = rfm[[\"r_score\", \"f_score\", \"m_score\"]].values\n",
    "X = scaler.fit_transform(X.astype(\"float32\"))\n",
    "X"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Find what could be an optimal number of clusters using elbow plot. As we see in the plot below, we can use 5 or 6 number of clusters (K) for KMeans algorithm."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Text(0, 0.5, 'Inertia Score')"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY4AAAEKCAYAAAAFJbKyAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xl8VNX5x/HPk50sEBIStoQtBBBFtgAq1ipuuFS0blhbcKloXarVurX9adXa1aXa/tQfVStaCyJqpW6IIFqxCmHfhIRFCGsgbCEQSPL8/rgnOGCWGWByM8nzfr3mNTNnzp37jC18uffce46oKsYYY0ywovwuwBhjTGSx4DDGGBMSCw5jjDEhseAwxhgTEgsOY4wxIbHgMMYYExILDmOMMSGx4DDGGBMSCw5jjDEhifG7gHBo06aNdunSxe8yjDEmosyZM2erqmbU169JBkeXLl3Iz8/3uwxjjIkoIvJ1MP3sVJUxxpiQWHAYY4wJiQWHMcaYkFhwGGOMCYkFhzHGmJBYcBhjjAmJBYcxxpiQNMng2LBjL+UVlX6XYYwxTVKTDI5te/bz5NQCv8swxpgmKazBISKpIjJJRL4SkWUicrKIpInIVBEpcM+tXV8RkadFpFBEForIgIDvGe36F4jI6Pr2m5YUx9hPVzLn65Jw/jxjjGmWwn3E8RTwgar2AvoCy4D7gGmqmgtMc+8BzgNy3WMM8CyAiKQBDwJDgMHAg9VhU5v2rRLokNqCuyYuoGx/xbH/VcYY04yFLThEpCVwGvACgKruV9UdwAhgnOs2DrjYvR4BvKyeL4BUEWkPnAtMVdUSVd0OTAWG17XvKBEeu7wva7aV8fv3vzrmv80YY5qzcB5xdAOKgb+LyDwReV5EkoC2qroRwD1nuv4dgXUB2xe5ttra63RSt3SuG9qVl//7NZ8VbD36X2OMMQYIb3DEAAOAZ1W1P7CHb05L1URqaNM62g/dWGSMiOSLSH5xcTEA9wzvSbeMJO6ZtIBd+w6E/AOMMcZ8WziDowgoUtUv3ftJeEGy2Z2Cwj1vCeifHbB9FrChjvZDqOpYVc1T1byMDG86+YTYaJ64oh+bd5fz0OSlx+6XGWNMMxa24FDVTcA6Eenpms4ElgKTgeoro0YDb7vXk4FR7uqqk4Cd7lTWFOAcEWntBsXPcW1B6Zedys2n5/DG3CI+XLLp6H+YMcY0c+FeyOk24FURiQNWAdfihdVEEbkeWAtc7vq+B5wPFAJlri+qWiIijwCzXb+HVTWk62xvG5bLtGVb+MVbixjYuTXpyfFH+7uMMabZEtVvDRdEvLy8PD18BcCvNu3ior/M5MzjMnnm6gGI1DR0YowxzZeIzFHVvPr6Nck7x2vSq11LfnZ2D95fvInJC741RGKMMSZIzSY4AMac1o0BnVL5n38tZvOufX6XY4wxEalZBUd0lPD4Ff3YX1nFPZMW0hRP0xljTLg1q+AA6NomifvPO45PVhQzfta6+jcwxhhziGYXHAA/OqkzQ7un85t3l7J2W5nf5RhjTERplsERFSX88bK+RIvw80kLqKqyU1bGGBOsZhkcAB1TW/DA93oza3UJL85c7Xc5xhgTMZptcABcNjCLs45ryx+nLKdwy26/yzHGmIjQrINDRPjt908gKS6aOycuoKKyyu+SjDGm0WvWwQGQmZLAo5f0YWHRTp6ZsdLvcowxptFr9sEBcH6f9lzUtwNPTytg8fqdfpdjjDGNmgWH8/CI40lLiuOuiQsor6j0uxxjjGm0LDic1MQ4/nDpiSzfvJsnpq7wuxxjjGm0LDgCnNErk6sGZzP201XM+TqkmduNMabZsOA4zC8v6E3H1BbcOXEBZfsr/C7HGGMaHQuOwyTHx/DY5X1ZW1LG79//yu9yjDGm0QlrcIjIGhFZJCLzRSTftf1aRNa7tvkicn5A//tFpFBElovIuQHtw11boYjcF86aAU7qls51Q7vy8n+/5rOCreHenTHGRJSGOOI4Q1X7Hbaq1JOurZ+qvgcgIr2BkcDxwHDgGRGJFpFo4H+B84DewFWub1jdfW5PcjKSuHvSAnbtOxDu3RljTMRoTKeqRgATVLVcVVfjrT0+2D0KVXWVqu4HJri+YZUQG83jV/Rjy+5yHpq8NNy7M8aYiBHu4FDgQxGZIyJjAtpvFZGFIvKiiLR2bR2BwAUyilxbbe2HEJExIpIvIvnFxcXHpPh+2ancfHoOb8wt4sMlm47JdxpjTKQLd3AMVdUBeKeZbhGR04BngRygH7AReNz1lRq21zraD21QHauqeaqal5GRcUyKB7htWC6927fkF28tYltp+TH7XmOMiVRhDQ5V3eCetwBvAYNVdbOqVqpqFfA3vFNR4B1JZAdsngVsqKO9QcTFRPHElX3ZtbeCX/1rsS03a4xp9sIWHCKSJCIp1a+Bc4DFItI+oNslwGL3ejIwUkTiRaQrkAvMAmYDuSLSVUTi8AbQJ4er7pr0ateSn53dg/cXb2LyggbLLGOMaZRiwvjdbYG3RKR6P/9U1Q9E5BUR6Yd3umkNcCOAqi4RkYnAUqACuEVVKwFE5FZgChANvKiqS8JYd43GnNaNqUs38T//WsyQrum0a5XQ0CUYY0yjIE3x1EteXp7m5+cf8+9dvXUP5z/1HwZ3TeOlawfhQtEYY5oEEZlz2K0TNWpMl+M2el3bJHH/+b34ZEUx42etq38DY4xpgiw4QvTDIZ0Z2j2d37y7lLXbyvwuxxhjGpwFR4iiooQ/XtaXaBF+PmkBVVVN71SfMcbUxYLjCHRMbcED3+vNrNUlvDhztd/lGGNMg7LgOEKXDczirOPa8scpyyncstvvcowxpsFYcBwhEeG33z+BpLho7py4gAOVVX6XZIwxDcKC4yhkpiTw6CV9WFi0k2dnrPS7HGOMaRAWHEfp/D7tGdGvA09PK2Dx+p1+l2OMMWFnwXEMPHTR8aQlxXHnxPmUV1T6XY4xxoSVBccxkJoYxx8uO5EVm0t5YuoKv8sxxpiwsuA4Rs7omclVg7MZ++kq8teU+F2OMcaEjQXHMfTLC3rTMbUFd72+gLL9FX6XY4wxYWHBcQwlx8fw2OV9WVtSxu/e+8rvcowxJiwsOI6xk7qlc93Qrrzyxdf8p+DYLGFrjDGNiQVHGNx9bk9yMpK4Z9JCdu494Hc5xhhzTIU1OERkjYgsEpH5IpLv2tJEZKqIFLjn1q5dRORpESkUkYUiMiDge0a7/gUiMjqcNR8LCbHRPHFFP7bsLufhfy/1uxxjjDmmGuKI4wxV7RewOMh9wDRVzQWmufcA5+EtF5sLjAGeBS9ogAeBIXjrkz9YHTaNWd/sVG45PYc35hbx4ZJNfpdjjDHHjB+nqkYA49zrccDFAe0vq+cLINWtT34uMFVVS1R1OzAVGN7QRR+JW4flcnyHlvzirUVsKy33uxxjjDkmwh0cCnwoInNEZIxra6uqGwHcc6Zr7wgELqtX5Npqa2/04mKiePyKvuzaW8Ev31pMU1ym1xjT/IQ7OIaq6gC801C3iMhpdfStaQFvraP90I1FxohIvojkFxc3nquZerVryc/O7sEHSzbx9vwNfpdjjDFHLazBoaob3PMW4C28MYrN7hQU7nmL614EZAdsngVsqKP98H2NVdU8Vc3LyMg41j/lqIw5rRsDOqXywNuL2bRzn9/lGGPMUQlbcIhIkoikVL8GzgEWA5OB6iujRgNvu9eTgVHu6qqTgJ3uVNYU4BwRae0Gxc9xbREjOkp4/Ip+HKhU7n1joZ2yMsZEtHAecbQFPhORBcAs4F1V/QD4PXC2iBQAZ7v3AO8Bq4BC4G/AzQCqWgI8Asx2j4ddW0Tp2iaJ+8/vxScrihk/a139GxhjTCMlTfFfv3l5eZqfn+93Gd9SVaWMenEWc9du54PbT6NTeqLfJRljzEEiMifg1ola2Z3jDSgqSvjjZScSLcLPX19AVVXTC21jTNNnwdHAOqS24MGLjmfWmhJenLna73KMMSZkFhw+uHRAR846ri1/nLKcgs27/S7HGGNCYsHhAxHhd9/vQ1JcNHe9voADlVV+l2SMMUGz4PBJRko8j17Sh4VFO3nm45V+l2OMMUGz4PDR+X3aM6JfB/4yvYDF63f6XY4xxgTFgsNnD190AunJcdw5cT77DlT6XY4xxtTLgsNnrRJj+f2lJ7JicylPfrTC73KMMaZeFhyNwBk9M7lqcCfGfrqK/DURd1O8MaaZseBoJH55wXFktW7BXa8vYE95hd/lGGNMreoNDhHpISLTRGSxe3+iiPwq/KU1L8nxMfzpsr6sLSnjd+8v87scY4ypVTBHHH8D7gcOAKjqQmBkOItqrk7qls71Q7vyjy/W8ta8Ir/LMcaYGgUTHImqOuuwNjuXEib3DO/FkK5p3DtpEXO+3u53OcYY8y3BBMdWEcnBrbonIpcBG8NaVTMWFxPFcz8cSPvUBG58JZ+i7WV+l2SMMYcIJjhuAf4P6CUi64E7gJvCWlUz1zopjhdGD6K8oorrX8qn1AbLjTGNSJ3BISJRQJ6qngVkAL1U9VRV/TrYHYhItIjME5F33PuXRGS1iMx3j36uXUTkaREpFJGFIjIg4DtGi0iBe4yubV9NSffMZJ65egCFxaX8dPw8Km0KdmNMI1FncKhqFXCre71HVY9kKtfbgcMvE7pbVfu5x3zXdh6Q6x5jgGcBRCQNeBAYgrdm+YNuCdkm7zu5Gfz6e72Z/tUWfveeXWlljGkcgjlVNVVEfi4i2SKSVv0I5stFJAu4AHg+iO4jgJfV8wWQKiLtgXOBqapaoqrbganA8GD23xT86OQujDq5M89/tpoJs9b6XY4xxhATRJ/r3PMtAW0KdAti2z8D9wAph7U/KiIPANOA+1S1HOgIBC7GXeTaamtvNh64sDert+7hV/9aTOf0JE7OSfe7JGNMM1bvEYeqdq3hUW9oiMiFwBZVnXPYR/cDvYBBQBpwb/UmNe2+jvbD9zdGRPJFJL+4uLi+8iJKTHQUf/3BADqnJ/KTV+ewZusev0syxjRjwdw5HisiPxWRSe5xq4jEBvHdQ4GLRGQNMAEYJiL/UNWN7nRUOfB3vHEL8I4ksgO2zwI21NF+CFUdq6p5qpqXkZERRHmRpVWLWF68ZhACXDduNjvLDvhdkjGmmQpmjONZYCDwjHsMdG11UtX7VTVLVbvg3Wk+XVV/6MYtEBEBLgYWu00mA6Pc1VUnATtVdSMwBThHRFq7QfFzXFuz0zk9ied+OJB1JWXc8s+5tnKgMcYXwYxxDFLVvgHvp4vIgqPY56sikoF3Cmo+39wT8h5wPlAIlAHXAqhqiYg8Asx2/R5W1WY7heyQbuk8enEf7nljIQ/9ewmPjDgBL4ONMaZhBBMclSKSo6orAUSkGxDSikOqOgOY4V4Pq6WPcugAfOBnLwIvhrLPpuyKQdkUFpcy9tNV5GamMPqULn6XZIxpRoIJjruBj0VkFd5RQmfc0YDxz73De7GqeA8P/XsJXdok8d0eTW9cxxjTOAVzVdU0vJvyfuoePVX143AXZuoWHSU8NbIfPdqmcOurcynYfCT3ZhpjTOiCuarqFqCFqi5U1QVAoojcHP7STH2S4mN44ZpBxMdGc/24fEr27Pe7JGNMMxDMVVU3qOqO6jfu7u0bwleSCUXH1BaMHTWQTbv2cdMrcyivCGn4yRhjQhZMcERJwGU7IhINxIWvJBOqAZ1a86fLTmTWmhJ+9dZivOsMjDEmPIIZHJ8CTBSR5/Du2L4J+CCsVZmQjejXkZVbSnl6eiHdM5O58bs5fpdkjGmiggmOe/Fmq/0J3lVVHxLcpIWmgd1xVg9WFu/h9x98RbeMZM7u3dbvkowxTVAwV1VVqepzwA+A3wBvqaqdSG+EoqKExy7vS5+Orbh9wjyWbNjpd0nGmCao1uAQkedE5Hj3uhXeXd4vA/NE5KoGqs+EqEVcNM+PyqNlQiw3jMtny+59fpdkjGli6jri+I6qLnGvrwVWqGofvLmq7gl7ZeaIZbZM4PnReWwvO8ANL89h3wE7QDTGHDt1BUfgTQFnA/8CUNVNYa3IHBMndGzFk1f2Y8G6Hdw9aaFdaWWMOWbqCo4dInKhiPTHmyL9AwARiQFaNERx5ugMP6Edd5/bk38v2MDT0wr9LscY00TUdVXVjcDTQDvgjoAjjTOBd8NdmDk2bj49h5XFpTz50QpyMpO48MQOfpdkjIlwtQaHqq6ghrW9VXUKzXQ9jEgkIvzu+31Yu62MuyYuIKt1Iv2yU/0uyxgTwYK5c9xEuPiYaP7vRwPJSInnhpfz2bBjr98lGWMimAVHM5GeHM+L1wxi7/5Kfjwun7L9FX6XZIyJUGEPDhGJFpF5IvKOe99VRL4UkQIReU1E4lx7vHtf6D7vEvAd97v25SJybrhrbqp6tE3hLz/oz1ebdnHHhPlUVdmVVsaY0AUVHCJygYjcIyIPVD9C2MftwLKA938AnlTVXGA7cL1rvx7YrqrdgSddP0SkN96a5cfjjbk84yZaNEfgjJ6Z/PKC3ny4dDN/+nC53+UYYyJQMOtxPAdcCdyGN1fV5XirANZLRLKAC3BzW7lZdocBk1yXccDF7vUI9x73+Zmu/whggqqWq+pqvDXJBwezf1Oz64Z24arBnXh2xkomzSnyuxxjTIQJ5ojjFFUdhXc08BBwMpAd5Pf/Ge8u8yr3Ph3YoarVJ9iLgI7udUdgHYD7fKfrf7C9hm0OEpExIpIvIvnFxcVBltc8iQgPjzieU3LSuf/NhcxeU+J3ScaYCBJMcFRfglMmIh2AA0DX+jYSkQuBLao6J7C5hq5az2d1bfNNg+pYVc1T1byMDFt/uz6x0VE8c/UAsloncuMrc1i7rczvkowxESKY4HhHRFKBPwFzgTXAhCC2GwpcJCLV/YfhHYGkurvPAbKADe51Ee5Ixn3eCigJbK9hG3MUUhPjeGF0HpVVyvXjZrN73wG/SzLGRIBgplV/RFV3qOobeGMbvVT1f4LY7n5VzVLVLniD29NV9WrgY+Ay12008LZ7Pdm9x30+Xb0JliYDI91VV12BXGBW0L/Q1KlbRjLPXj2A1Vv3cNv4eVRUVtW/kTGmWatrWvVh7vn71Q+8ge4z3esjdS9wp4gU4o1hvODaXwDSXfudwH0AbobeicBSvPmybrH1QI6tU7q34aERxzNjeTGPvres/g2MMc1aXXNVfReYDnyvhs8UeDPYnajqDGCGe72KGq6KUtV9eFds1bT9o8Cjwe7PhO7qIZ0p3FLK32euoXtmMlcPCerCOWNMM1TXXFUPupcPu8tgD3KnjEwT86sLerN66x4eeHsJXdKTGNq9jd8lGWMaoWAGx9+ooW1SDW0mwkVHCX+5qj85GUn85B9zWFVc6ndJxphGqK4xjl4icinQKnCcQ0SuARIarELToFISYnlh9CBioqO4flw+O8r217+RMaZZqeuIoydwIZCKN85R/RgA3BD+0oxfstMS+b8fDWT99r3c/OpcDtiVVsaYALUGh6q+DfwYeFxVrw14/FRVP2+4Eo0fBnVJ43ff78PnK7fxwNtLbOlZY8xBdY5xuMtez26gWkwjc+nALH5yeg7jZ63lxZlr/C7HGNNI1HU5brXPReSvwGvAnupGVZ0btqpMo3H3OT1ZVVzKo+8upVubJM7olel3ScYYn0l9pyBE5OMamlVVh4WnpKOXl5en+fn5fpfRZJTtr+Dy5/7L19vKeOMnp9CzXYrfJRljwkBE5qhqXn39gply5IwaHo02NMyxlxgXw/Oj80iMi+b6cbPZWlrud0nGGB8Fsx5HWxF5QUTed+97i8j19W1nmpb2rVrwt1F5FO8u56ZX5lBeYbO+GNNcBXMD4EvAFKCDe78CuCNcBZnGq292Ko9f0Zf8r7dz/xuL7EorY5qpYIKjjapOxC3G5BZZsn9uNlMXntiBn53VgzfnreeZGSv9LscY44NgrqraIyLpuMWTROQkvNX5TDP10zO7s7K4lD9NWU5ORhLDT2jvd0nGmAYUTHDcibcmRo6IzAQy+GY9DdMMiQh/vOxE1paU8bPXFpDVOpETOrbyuyxjTAMJ5qqquXhTrJ8C3Agcr6oLw12YadwSYqMZO2ogrRNj+fG4fDbv2ud3ScaYBhLMGAd462f0xZun6ioRGVXfBiKSICKzRGSBiCwRkYdc+0sislpE5rtHP9cuIvK0iBSKyEIRGRDwXaNFpMA9Rte2T9OwMlMSeOGaQezad4AbXs5n734b+jKmOQjmctxXgMeAU4FB7lHvDSJAOTBMVfsC/YDhbnwE4G5V7ece813beXjLwuYCY4Bn3f7TgAeBIXgB9qCItA7y95kwO659S54a2Z9F63fy89cXUFVlV1oZ09QFM8aRB/TWEK+9dP2rF3SIdY+6vmME8LLb7gsRSRWR9sDpwFRVLQEQkanAcGB8KPWY8Dm7d1vuG96L373/FTkZSdx5Tk+/SzLGhFEwp6oWA+2O5MtFJFpE5gNb8P7y/9J99Kg7HfWkiMS7to7AuoDNi1xbbe2mERlzWjcuH5jF09MLmZi/zu7xMKYJC+o+DmCpiEwRkcnVj2C+XFUrVbUfkAUMFpETgPuBXninvNKAe113qekr6mg/hIiMEZF8EckvLi4OpjxzDIkIj17ShyFd07hn0kIuffZzZizfYgFiTBMUzKmqXx/tTlR1h4jMAIar6mOuuVxE/g783L0vArIDNssCNrj20w9rn1HDPsYCY8Gb5PBoazahi4uJ4uXrBzMxv4jnZqzkmr/Ppm9WK356Zi7DemUiUtO/AYwxkabe2XGP+ItFMoADLjRaAB8CfwDmqOpG8f4WeRLYp6r3icgFwK3A+XgD4U+r6mA3OD4H74ougLnAwOoxj5rY7Lj+219RxZtzi/jfGYWsK9nL8R1actuwXM7p3ZaoKAsQYxqjYGfHrfWIQ0R2U/NgtuCNfbes57vbA+NEJBrvlNhEVX1HRKa7UBFgPnCT6/8eXmgUAmXAtXg7KhGRR4DZrt/DdYWGaRziYqIYObgTlw7M4l/z1vO/Hxdy0z/m0KtdCrcNy+W8E9pZgBgTocJ2xOEnO+JofCoqq3hn4Ub+Mr2AlcV7yM1M5tZh3bnwxA5EW4AY0ygEe8RhwWEaVGWV8t4iL0BWbC6lW5skbjmjOyP6dSAmOtj7UY0x4WDBYcHRqFVVKVOWbOLp6YUs27iLTmmJ3HpGdy4Z0JFYCxBjfGHBYcEREVSVj5Zt4elpBSxav5OOqS24+YwcLhuYRXxMtN/lGdOsWHBYcEQUVWXG8mKemlbA/HU7aN8qgZ+cnsMVedkkxFqAGNMQLDgsOCKSqvJZ4VaenlbA7DXbyUyJ58bv5vCDwZ1oEWcBYkw4WXBYcEQ0VeWLVSU8Pa2A/67aRpvkOMac1o2rh3QmKT6Y+1aNMaGy4LDgaDJmrS7hL9ML+E/BVtKS4vjxd7oy6uQuJFuAGHNMWXBYcDQ5c9du5y/TCvh4eTGtWsRy/aldGX1KF1q1iPW7NGOaBAsOC44ma2HRDp6eVshHyzaTkhDDtUO7ct3QLqQmxvldmjERzYLDgqPJW7x+J3+dXsgHSzaRHB/DqJM78+PvdCMtyQLEmCNhwWHB0Wx8tWkXf51eyLuLNtIiNpofneQFSEZKfP0bG2MOsuCw4Gh2Crfs5q/TC5m8YANxMVH8YHBnbvxuN9q2TPC7NGMiggWHBUeztXrrHv7340Lemree6CjhqkHZ3HR6Du1btfC7NGMaNQsOC45mb+22Mp6ZUcikOUVEiXB5XhY/OT2HrNaJfpdmTKNkwWHBYZyi7WU898lKJs4uokqVSwdkcfMZOXROT/K7NGMalWCDI2zTkIpIgojMEpEFIrJERB5y7V1F5EsRKRCR10QkzrXHu/eF7vMuAd91v2tfLiLnhqtm0zRltU7kNxf34ZN7TueHJ3XmrfnrGfb4J9w1cQGrikv9Ls+YiBPO+avLgWGq2hfoBwwXkZPwlo99UlVzge3A9a7/9cB2Ve2Ot6TsHwBEpDcwEjgeGA4841YVNCYk7Vu14NcXHc9n95zBNad04d1FGzjriU+4fcI8Cjbv9rs8YyJG2IJDPdX/nIt1DwWGAZNc+zjgYvd6hHuP+/xMty75CGCCqpar6mq8pWUHh6tu0/Rltkzgfy7szWf3DuOG07oxdelmzvnzp9zyz7l8tWmX3+UZ0+iFdcUcEYkWkfnAFmAqsBLYoaoVrksR0NG97gisA3Cf7wTSA9tr2MaYI9YmOZ77zzuOz+4dxs2n5/DJ8mKG//k/XPv3WUxZsokDlVV+l2hMoxTWWeJUtRLoJyKpwFvAcTV1c881LTytdbQfQkTGAGMAOnXqdET1muYpLSmOu8/txQ3f6cZLn69h/Ky13PjKHDJS4rlsYBYjB2XbQLoxARpkjU5V3QHMAE4CUkWkOrCygA3udRGQDeA+bwWUBLbXsE3gPsaqap6q5mVkZITjZ5gmLjUxjjvO6sHMe4fx/Kg8+ma14v8+Wcl3/zSDH/ztC96ev559Byr9LtMY34XtiENEMoADqrpDRFoAZ+ENeH8MXAZMAEYDb7tNJrv3/3WfT1dVFZHJwD9F5AmgA5ALzApX3cbEREdxVu+2nNW7LZt27mPSnHW8lr+O2yfMJzUxlkv6d+SqwZ3o0TbF71KN8UXY7uMQkRPxBruj8Y5sJqrqwyLSDS800oB5wA9VtVxEEoBXgP54RxojVXWV+65fAtcBFcAdqvp+Xfu2+zjMsVZVpXy+chvjZ6/lwyWbOFCpDOiUyshBnbiwb3sS42xtEBP57AZACw4TJiV79vPm3CLGz1rLyuI9JMfHcFG/DowclE2fjq3wLgY0JvJYcFhwmDBTVeZ8vZ3xs9bx7qIN7DtQRe/2LRk5OJsR/TraAlMm4lhwWHCYBrRr3wHenr+BCbPWsmTDLhJiozi/T3tGDurEoC6t7SjERAQLDgsO45PF63cyftZa3p6/gdLyCrplJDFyUDaXDsgiPdnWCDGNlwWHBYfxWdn+Ct5duJHXZq8j/+vtxEYL5/Rux5WDsjm1exuiouwoxDQuFhwWHKYRKdi8mwmz1/Hm3CK2lx2gY2oLrhyUzeV5WbZOiGk0LDgsOEwjVF5lR+eoAAAR+UlEQVRRyYdLNjNh9lpmFm4jSuCMnplcOSibYb0yiYlukHtyjalRsMFhF58b04DiY6L5Xt8OfK9vB9ZuK+O1/LW8nl/EtK+2kJkSz+V5WVyRZ1OcmMbNjjiM8VlFZRUfLy9mwqy1fLx8C1UKQ7unc+WgTpx7fFviY2wVAdMw7FSVBYeJQJt27uP1fG+Kk6Lte2mdGMv3B3gTLebaFCcmzCw4LDhMBKuqUmau3MqEWev4cKk3xcnAzq25clA2F55oU5yY8LDgsOAwTcS20nLenLue8bPXsipgipOrBnWiT1Yrv8szTYgFhwWHaWJUlfyvtzN+1lreW7SRfQeqOL5DS0YOymZE/460TLApTszRseCw4DBN2M69B5g8fz3jZ61j6cZvpji5anAn8jrbFCfmyFhwWHCYZkBVWbx+F+Nnr2Wym+IkIyWeoTnpDO3ehlNz29gNhiZoFhwWHKaZ2VNewQeLN/HJimI+X7mVraX7AeiWkcSp3dswtHsbTuqWbrP2mlpZcFhwmGZMVVm+eTefFWxlZuFWvlxdQtn+SqIE+mSlcmp374hkYOfWdp+IOcj34BCRbOBloB1QBYxV1adE5NfADUCx6/oLVX3PbXM/cD1QCfxUVae49uHAU3irCT6vqr+va98WHMYcan9FFfPX7eCzwq18XriVeet2UFmlJMRGMahL2sEjkt7tW9rki81YYwiO9kB7VZ0rIinAHOBi4AqgVFUfO6x/b2A8MBhvbfGPgB7u4xXA2UARMBu4SlWX1rZvCw5j6rZ73wFmrS7hs0LviGTF5lIAWifGckqOFyKndm9Dp/REnys1Dcn3uapUdSOw0b3eLSLLgI51bDICmKCq5cBqESnECxGAwoD1xye4vrUGhzGmbikJsZx5XFvOPK4tAFt27WPmyq3MLNzGZwVbeXfRRgCy01ow1AXJKTnptp6IARpokkMR6QL0B74EhgK3isgoIB+4S1W344XKFwGbFfFN0Kw7rH1ImEs2plnJbJnAJf2zuKR/FqrKqq17mFm49WCITJjt/RHs3b4lp+Z6QTK4Sxot4mx8pDkKe3CISDLwBnCHqu4SkWeBRwB1z48D1wE1nVhVoKZ5pr91fk1ExgBjADp16nRsijemGRIRcjKSyclIZtTJXaiorGLxhl0Hg+SlmWsY++kq4qKjGNA51TsiyW3DiR1b2bTwzURYr6oSkVjgHWCKqj5Rw+ddgHdU9QQ3MI6q/s59NgX4tev6a1U917Uf0q8mNsZhTPjs3V/J7DUlzCzcysyVW1myYReqkBIfw0k56QcH2nMykuxGxAjj+xiHeP+PeQFYFhgaItLejX8AXAIsdq8nA/8UkSfwBsdzgVl4RyK5ItIVWA+MBH4QrrqNMXVrERfNaT0yOK1HBgAle/bz35XbDg60T126GYB2LRM4pfs3QdK2ZYKfZZtjKJynqoYCPwIWich81/YL4CoR6Yd3umkNcCOAqi4RkYl4g94VwC2qWgkgIrcCU/Aux31RVZeEsW5jTAjSkuK44MT2XHBiewDWbitzA+1bmbG8mDfnrgcgNzP54NVaQ7qlkWJza0UsuwHQGBM2VVXKsk1ufKRwG7NWb2PfgSqio4S+Wa0OHo3079SauBgbH/Gb7/dx+MmCw5jGqbyiknlrd7gg2cqCdTuoUmgRG82QbmkMzWlD7w4t6Z6ZTGZKvI2RNDALDgsOYxq9nXsP8OWqbQeDZGXxnoOfpcTH0C0zme4ZyXTP/OaR3bqFXb0VJhYcFhzGRJzi3eUUbN5NYXEphVu8x8riUjbvKj/YJy46iq5tksjJTKJ7RjI5LlByMpJJiLX7So6G71dVGWNMqDJS4slIieeU7m0Oad+17wArXZAUFpeyckspSzfs4oPFm6hy//YVgazWLbwwOewoJTUxzodf03RZcBhjGr2WCbH079Sa/p1aH9JeXlHJmq1lB49Oqo9UPl+5jfKKqoP92iTHHRIm1a/bt0qwcZQjYMFhjIlY8THR9GyXQs92KYe0V1Yp67fvpbB4Nyu37DkYKu8s3MjOvQcO9kuKi/ZOdR12yqtzeiKxNo5SKwsOY0yTEx0ldEpPpFN6IsN6fdOuqmwt3X/IKa+VxaX8d9U23py3/mC/2Gihc3rStwbmu2UkkRhnf23afwFjTLMhIgfHUU7OST/ks9LyikPGUQq3lLJi826mLttMZdU3FxF1TG1x8CglMFTSkprPOIoFhzHGAMnxMfTNTqVvduoh7fsrqvh6255vjaNU38xYrXViLLmZKXRvm0xuZjI92qaQm5lMRhO8H8WCwxhj6hAXE0Vu2xRy2x46jlJVpazfsfeQU14Fm0t597BxlFYtYsnNTPa+IzOZ3LZeqETyDY4WHMYYcwSiooTstESy0xI5o2fmwXZVpbi0nILNpRRs3k3BFi9Q3l+8kfFl3wRKy4SYg2HSvfoIpW0y7Vo2/iu9LDiMMeYYEhEyUxLITElgaMD9KNUD8wVbdnuh4p4/XLr54EJZ4N0x371tMj0yvSCpDpXGdOmwBYcxxjSAwIH5U3IOvcFxW2k5KzaXUrhlNytcqHy0bDOv5X8TKMnxMXTPTD54uqv6aKVjaosGDxQLDmOM8Vl6cjwnJ3/7Sq9tpeXe1V1bSinc7IXKx8uLeX1O0cE+SXHRXqAEjKHkZqbQMbUFUVHhCRQLDmOMaaTSk+NJT45nSLdDA2X7nv3e2EnAaa9PVhQzKSBQEl2geEcpKfRwgZLV+ugDJZwrAGYDLwPtgCpgrKo+JSJpwGtAF7yFnK5Q1e1uxcCngPOBMuAaVZ3rvms08Cv31b9R1XHhqtsYYxq71klxDO6axuCuaYe07yjbf3AwvjpUZhZuPbiYFkBCbJQ3bnLw0mEvVLJaJwa9/3AecVQAd6nqXBFJAeaIyFTgGmCaqv5eRO4D7gPuBc7DWy42FxgCPAsMcUHzIJCHt2rgHBGZrKrbw1i7McZEnNTEOAZ1SWNQl0MDZWfZAQqL3fiJC5XPVx56t3x8CAtphS043LriG93r3SKyDOgIjABOd93GATPwgmME8LJ687x/ISKpItLe9Z2qqiUALnyGA+PDVbsxxjQlrRJjGdg5jYGdDw2UXfsOUBAwKP9AkN/XIGMcItIF6A98CbR1oYKqbhSR6gugOwLrAjYrcm21tRtjjDkKLRNiGdi5NQM7e7MOBxscYZ/+UUSSgTeAO1R1V11da2jTOtoP388YEckXkfzi4uIjK9YYY0y9whocIhKLFxqvquqbrnmzOwWFe97i2ouA7IDNs4ANdbQfQlXHqmqequZlZGQc2x9ijDHmoLAFh7tK6gVgmao+EfDRZGC0ez0aeDugfZR4TgJ2ulNaU4BzRKS1iLQGznFtxhhjfBDOMY6hwI+ARSIy37X9Avg9MFFErgfWApe7z97DuxS3EO9y3GsBVLVERB4BZrt+D1cPlBtjjGl44l3E1LTk5eVpfn6+32UYY0xEEZE5qppXXz9bG9EYY0xILDiMMcaExILDGGNMSJrkGIeI7AaW+13HUWgDbPW7iKNg9fvL6vdPJNcO0FNVU+rr1FRnx10ezABPYyUi+Va/f6x+f0Vy/ZFcO3j1B9PPTlUZY4wJiQWHMcaYkDTV4BjrdwFHyer3l9Xvr0iuP5JrhyDrb5KD48YYY8KnqR5xGGOMCZMmFRwiki0iH4vIMhFZIiK3+11TKEQkQURmicgCV/9DftcUKhGJFpF5IvKO37UcCRFZIyKLRGR+sFeYNBZu8bNJIvKV+zNwst81BUtEerr/5tWPXSJyh991hUJEfub+3C4WkfEikuB3TaEQkdtd7Uvq+2/fpE5VuWna2wcuVwtcrKpLfS4tKG5G4SRVLXVT0n8G3K6qX/hcWtBE5E68ZX5bquqFftcTKhFZA+SpasRdiy8i44D/qOrzIhIHJKrqDr/rCpWIRAPrgSGq+rXf9QRDRDri/Xntrap7RWQi8J6qvuRvZcERkROACcBgYD/wAfATVS2oqX+TOuJQ1Y2qOte93g1UL1cbEdRT6t7GukfEJLuIZAEXAM/7XUtzIyItgdPwljJAVfdHYmg4ZwIrIyU0AsQALUQkBkikhnWDGrHjgC9UtUxVK4BPgEtq69ykgiPQYcvVRgx3qmc+3gJXU1U1kur/M3APUOV3IUdBgQ9FZI6IjPG7mBB0A4qBv7tThc+LSJLfRR2hkcB4v4sIhaquBx7DWypiI956Qh/6W1VIFgOniUi6iCTiLXGRXVvnJhkcISxX2+ioaqWq9sNb6XCwO4Rs9ETkQmCLqs7xu5ajNFRVBwDnAbeIyGl+FxSkGGAA8Kyq9gf2APf5W1Lo3Cm2i4DX/a4lFG6RuRFAV6ADkCQiP/S3quCp6jLgD8BUvNNUC4CK2vo3ueCoZbnaiONOM8wAhvtcSrCGAhe5MYIJwDAR+Ye/JYVOVTe45y3AW3jnfCNBEVAUcIQ6CS9IIs15wFxV3ex3ISE6C1itqsWqegB4EzjF55pCoqovqOoAVT0NKAFqHN+AJhYcdSxXGxFEJENEUt3rFnj/Z/zK36qCo6r3q2qWqnbBO9UwXVUj5l9cACKS5C6qwJ3mOQfvEL7RU9VNwDoR6emazgQi4qKQw1xFhJ2mctYCJ4lIovt76Ey8MdaIISKZ7rkT8H3q+N+hqU1yWONytar6no81haI9MM5dVRIFTFTViLysNUK1Bd7y/twTA/xTVT/wt6SQ3Aa86k73rMItvxwp3Ln1s4Eb/a4lVKr6pYhMAubineKZR+TdRf6GiKQDB4BbVHV7bR2b1OW4xhhjwq9JnaoyxhgTfhYcxhhjQmLBYYwxJiQWHMYYY0JiwWGMMSYkFhzGGGNCYsFhIpKIlAa8Pl9ECtyNS4f3u1hEHmjY6momIj91052/GmT/GSKSdwT76Sci54deYa3f95GbUsMYwILDRDgRORP4CzBcVdfW0OUe4JmGrapWNwPnq+rVYd5PP7xJ6oLmZnStzSt4tRsDWHCYCCYi3wH+Blygqitr+LwHUF69toaIvCQiT4vI5yKySkQuc+2nBy48JSJ/FZFr3Os1IvJbEfmviOSLyAARmSIiK0XkplrqutMtiLO4ekEcEXkObwbbySLys8P6R4vIY24BqYUiclsN3xl4hHWZiLzkXl/u9rNARD51d40/DFzpFkS60k2l8qKIzHYz545w214jIq+LyL/xZgRu775jvvvO77hdTsabCsQYoOlNOWKaj3jgbeB0Va1tPq+heFNABGoPnAr0wvsLcVIQ+1qnqieLyJPAS+57E4AlwHOBHUVkIN5UH0MAAb4UkU9U9SYRGQ6cUcMiUWPwZlXtr6oVIpIWRE3VHgDOVdX1IpKqqvvdqbk8Vb3V1fRbvLnDrnNzoc0SkY/c9icDJ6pqiYjcBUxR1UfdtDeJAKq6XUTiRSRdVbeFUJtpouyIw0SqA8DnwPV19GmPt0ZFoH+papVbFbJtkPua7J4XAV+q6m5VLQb2VU9KGeBU4C1V3eMW5XoT+A51Owt4zi2gg6qWBFkXwEzgJRG5AYiupc85wH1u/rYZeKFXPR40NWB/s4FrReTXQB+3GFq1LXjThRtjwWEiVhVwBTBIRH5RS5+9eH9JBioPeC3uuYJD/yzUtk3VYdtX8e2jdiF0Qv0rPQZ+frA+Vb0J+BXeojvz3SR1NX3/parazz06ufUXwFu3o/q7PsVbRXA98IqIjDpsn3uD/UGmabPgMBFLVcuAC4GrRaSmI49lQPcgvuproLc7HdMKb0rsI/UpcLGbXjsJb/nN/9SzzYfATdUD1LWcqtosIseJSBQBS3qKSI6qfqmqDwBb8QJkN5ASsO0U4DY33Tci0r+mIkSkM95iXH/DW55ggGsXoB2wpp7fYZoJCw4T0dxpluHAr6oHfQN8CvSv/guzju9YB0wEFgKv4k2JfaT1zMUbB5mFt2zx86pa3/c9j7eew0IRWQD8oIY+9wHvANPxliat9ic3qL4Y7/cuAD7GC8L5InIl8Aje+vULXb9HaqnjdLyjlnnApcBTrn0g3nrUta4IZ5oXm1bdNGki8hTwb1X9qN7Opkbuv+FkVZ3mdy2mcbAjDtPU/RZ3dZA5YostNEwgO+IwxhgTEjviMMYYExILDmOMMSGx4DDGGBMSCw5jjDEhseAwxhgTkv8Hk0KKH4wPODIAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "inertias = {}\n",
    "for k in range(2, 10): \n",
    "    kmeans = cluster.KMeans(n_clusters=k, random_state=1)\n",
    "    kmeans.fit(X)\n",
    "    inertias[k] = kmeans.inertia_\n",
    "    \n",
    "pd.Series(inertias).plot()\n",
    "plt.xlabel(\"K (num of clusters)\")\n",
    "plt.ylabel(\"Inertia Score\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "k = 5\n",
    "kmeans = cluster.KMeans(n_clusters=k, random_state = 1)\n",
    "rfm[\"cluster\"] = kmeans.fit_predict(X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2    1163\n",
       "1    1136\n",
       "0     777\n",
       "3     668\n",
       "4     595\n",
       "Name: cluster, dtype: int64"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rfm.cluster.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:5: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  \"\"\"\n",
      "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:5: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  \"\"\"\n",
      "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:5: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  \"\"\"\n",
      "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:5: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  \"\"\"\n",
      "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:5: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  \"\"\"\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>recency</th>\n",
       "      <th>frequency</th>\n",
       "      <th>monetary</th>\n",
       "      <th>r_score</th>\n",
       "      <th>m_score</th>\n",
       "      <th>f_score</th>\n",
       "      <th>cluster</th>\n",
       "      <th>distance</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>CustomerID</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>13591.0</th>\n",
       "      <td>64</td>\n",
       "      <td>58</td>\n",
       "      <td>1117.13</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0.835705</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13630.0</th>\n",
       "      <td>4</td>\n",
       "      <td>71</td>\n",
       "      <td>2031.78</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>0.763178</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16658.0</th>\n",
       "      <td>367</td>\n",
       "      <td>8</td>\n",
       "      <td>123.24</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0.692686</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14346.0</th>\n",
       "      <td>120</td>\n",
       "      <td>60</td>\n",
       "      <td>1021.58</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0.352840</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17119.0</th>\n",
       "      <td>2</td>\n",
       "      <td>35</td>\n",
       "      <td>606.91</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>0.712960</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14340.0</th>\n",
       "      <td>218</td>\n",
       "      <td>6</td>\n",
       "      <td>134.70</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0.692686</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14880.0</th>\n",
       "      <td>76</td>\n",
       "      <td>47</td>\n",
       "      <td>706.19</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0.645417</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15503.0</th>\n",
       "      <td>362</td>\n",
       "      <td>70</td>\n",
       "      <td>147.09</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>1.784894</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13721.0</th>\n",
       "      <td>35</td>\n",
       "      <td>28</td>\n",
       "      <td>524.14</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>0.973406</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15355.0</th>\n",
       "      <td>28</td>\n",
       "      <td>14</td>\n",
       "      <td>277.05</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>0.581352</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13089.0</th>\n",
       "      <td>2</td>\n",
       "      <td>1818</td>\n",
       "      <td>58825.83</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>0.595008</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13199.0</th>\n",
       "      <td>3</td>\n",
       "      <td>119</td>\n",
       "      <td>9817.12</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>0.697552</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14727.0</th>\n",
       "      <td>273</td>\n",
       "      <td>10</td>\n",
       "      <td>268.58</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0.534157</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15683.0</th>\n",
       "      <td>116</td>\n",
       "      <td>10</td>\n",
       "      <td>193.75</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0.769721</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13447.0</th>\n",
       "      <td>22</td>\n",
       "      <td>59</td>\n",
       "      <td>1129.14</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>0.662144</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14555.0</th>\n",
       "      <td>56</td>\n",
       "      <td>21</td>\n",
       "      <td>324.14</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>0.652340</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12890.0</th>\n",
       "      <td>23</td>\n",
       "      <td>82</td>\n",
       "      <td>380.47</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>1.002155</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16561.0</th>\n",
       "      <td>5</td>\n",
       "      <td>31</td>\n",
       "      <td>511.12</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>0.712960</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16363.0</th>\n",
       "      <td>52</td>\n",
       "      <td>5</td>\n",
       "      <td>109.36</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>0.728848</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17865.0</th>\n",
       "      <td>30</td>\n",
       "      <td>370</td>\n",
       "      <td>10526.32</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>0.458289</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            recency  frequency  monetary  r_score  m_score  f_score  cluster  \\\n",
       "CustomerID                                                                     \n",
       "13591.0          64         58   1117.13        3        4        3        0   \n",
       "13630.0           4         71   2031.78        5        4        4        2   \n",
       "16658.0         367          8    123.24        1        1        1        1   \n",
       "14346.0         120         60   1021.58        2        4        4        0   \n",
       "17119.0           2         35    606.91        5        3        3        3   \n",
       "14340.0         218          6    134.70        1        1        1        1   \n",
       "14880.0          76         47    706.19        2        3        3        0   \n",
       "15503.0         362         70    147.09        1        1        4        1   \n",
       "13721.0          35         28    524.14        3        3        2        3   \n",
       "15355.0          28         14    277.05        4        2        1        4   \n",
       "13089.0           2       1818  58825.83        5        5        5        2   \n",
       "13199.0           3        119   9817.12        5        5        4        2   \n",
       "14727.0         273         10    268.58        1        2        1        1   \n",
       "15683.0         116         10    193.75        2        1        1        1   \n",
       "13447.0          22         59   1129.14        4        4        4        2   \n",
       "14555.0          56         21    324.14        3        2        2        4   \n",
       "12890.0          23         82    380.47        4        2        4        3   \n",
       "16561.0           5         31    511.12        5        3        3        3   \n",
       "16363.0          52          5    109.36        3        1        1        4   \n",
       "17865.0          30        370  10526.32        4        5        5        2   \n",
       "\n",
       "            distance  \n",
       "CustomerID            \n",
       "13591.0     0.835705  \n",
       "13630.0     0.763178  \n",
       "16658.0     0.692686  \n",
       "14346.0     0.352840  \n",
       "17119.0     0.712960  \n",
       "14340.0     0.692686  \n",
       "14880.0     0.645417  \n",
       "15503.0     1.784894  \n",
       "13721.0     0.973406  \n",
       "15355.0     0.581352  \n",
       "13089.0     0.595008  \n",
       "13199.0     0.697552  \n",
       "14727.0     0.534157  \n",
       "15683.0     0.769721  \n",
       "13447.0     0.662144  \n",
       "14555.0     0.652340  \n",
       "12890.0     1.002155  \n",
       "16561.0     0.712960  \n",
       "16363.0     0.728848  \n",
       "17865.0     0.458289  "
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rfm[\"distance\"] = 0.0\n",
    "for i in range(k):\n",
    "    centroid = kmeans.cluster_centers_[i].reshape(1, -1)\n",
    "    cluster_points = X[rfm.cluster == i]\n",
    "    rfm[\"distance\"][rfm.cluster == i] = metrics.euclidean_distances(centroid, cluster_points).flatten()\n",
    "rfm.sample(20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>mean</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>cluster</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.869923</td>\n",
       "      <td>777</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.755233</td>\n",
       "      <td>1136</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.705951</td>\n",
       "      <td>1163</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.835333</td>\n",
       "      <td>668</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.773771</td>\n",
       "      <td>595</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             mean  count\n",
       "cluster                 \n",
       "0        0.869923    777\n",
       "1        0.755233   1136\n",
       "2        0.705951   1163\n",
       "3        0.835333    668\n",
       "4        0.773771    595"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rfm.groupby(\"cluster\").distance.agg([\"mean\", \"count\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
